import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import plotly
import plotly.offline as pyoff
import plotly.graph_objs as go
import plotly.express as px
import chart_studio
import chart_studio.plotly as py
import calmap
import datetime
import tensorflow as tf
from datetime import date
from plotly.subplots import make_subplots
from itertools import cycle, product
from statsmodels.tsa.seasonal import STL
from scipy.stats import boxcox
from pmdarima.arima import ARIMA as pmdARIMA
from pmdarima.arima import ADFTest, KPSSTest, auto_arima
from pmdarima.utils import diff_inv
from statsmodels.tsa.stattools import adfuller
from sklearn.model_selection import TimeSeriesSplit
from keras.layers import LSTM, Dense
from keras import Sequential
from keras.backend import clear_session
from keras.callbacks import EarlyStopping
from keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.linear_model import LinearRegression
from scipy.special import boxcox1p, inv_boxcox1p
import matplotlib.patches as mpatches
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.model_selection import GridSearchCV
from joblib import delayed
from warnings import catch_warnings
from warnings import filterwarnings
from statsmodels.tsa.forecasting.stl import STLForecast
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
C:\Users\herik\anaconda3\lib\site-packages\numpy\_distributor_init.py:30: UserWarning: loaded more than 1 DLL from .libs:
C:\Users\herik\anaconda3\lib\site-packages\numpy\.libs\libopenblas.EL2C6PLE4ZYW3ECEVIV3OXXGRN2NRFM2.gfortran-win_amd64.dll
C:\Users\herik\anaconda3\lib\site-packages\numpy\.libs\libopenblas.WCDJNK7YVMPZQ2ME2ZZHJJRJ3JIKNDB7.gfortran-win_amd64.dll
warnings.warn("loaded more than 1 DLL from .libs:"
SEED = 84796315
PREVISOES = 5
EPOCAS = 30
BATCH_SIZE = 1000
EXECUTAR_GRID_SEARCH = False
dtOrders = pd.read_csv('../data/olist_orders_dataset.csv', encoding = 'utf8')
# Colunas do tipo data
dateColumns = ['order_purchase_timestamp', 'order_approved_at', 'order_delivered_carrier_date',\
'order_delivered_customer_date', 'order_estimated_delivery_date']
# Dataset de analise temporal
dtOrdersAjustado = dtOrders.copy()
# Convertendo columas de data para date
for col in dateColumns:
dtOrdersAjustado[col] = pd.to_datetime(dtOrdersAjustado[col], format = '%Y-%m-%d %H:%M:%S')
# Dropando valores NA
dtOrdersAjustado = dtOrdersAjustado.dropna()
dtOrdersAjustado.dtypes
order_id object customer_id object order_status object order_purchase_timestamp datetime64[ns] order_approved_at datetime64[ns] order_delivered_carrier_date datetime64[ns] order_delivered_customer_date datetime64[ns] order_estimated_delivery_date datetime64[ns] dtype: object
dtHistory = pd.to_datetime(dtOrdersAjustado['order_purchase_timestamp']).dt.date
start = dtHistory.min()
end = dtHistory.max()
idx = pd.date_range(start, end, normalize=True)
seriesHistory = dtHistory.value_counts(sort=False).sort_index().reindex(idx, fill_value=0)
dtHistory = pd.DataFrame(seriesHistory).reset_index()
Principais outliers identificados:
dtHistory
| index | order_purchase_timestamp | |
|---|---|---|
| 0 | 2016-09-15 | 1 |
| 1 | 2016-09-16 | 0 |
| 2 | 2016-09-17 | 0 |
| 3 | 2016-09-18 | 0 |
| 4 | 2016-09-19 | 0 |
| ... | ... | ... |
| 709 | 2018-08-25 | 69 |
| 710 | 2018-08-26 | 73 |
| 711 | 2018-08-27 | 66 |
| 712 | 2018-08-28 | 39 |
| 713 | 2018-08-29 | 11 |
714 rows × 2 columns
# Plot
# Definição dos dados no plot (Iniciando em Fevereiro de 2017 para não destorcer os dados)
plot_data = [go.Scatter(x = dtHistory['index'],
y = dtHistory['order_purchase_timestamp'])]
# Layout
plot_layout = go.Layout(xaxis = {'title': 'Periodo'},
yaxis = {"title": 'Vendas'},
title = 'Vendas por dia')
# Plot da figura
fig = go.Figure(data = plot_data, layout = plot_layout)
pyoff.iplot(fig)
# Remove outliers
seriesHistory = seriesHistory[datetime.date(2017, 1, 1): datetime.date(2018, 8, 17)]
pred_range = pd.date_range(datetime.date(2018, 8, 17), datetime.date(2018, 10, 17))
dtHistory = pd.DataFrame(seriesHistory).reset_index()
# Plot
# Definição dos dados no plot (Iniciando em Fevereiro de 2017 para não destorcer os dados)
plot_data = [go.Scatter(x = dtHistory['index'],
y = dtHistory['order_purchase_timestamp'])]
# Layout
plot_layout = go.Layout(xaxis = {'title': 'Periodo'},
yaxis = {"title": 'Vendas'},
title = 'Vendas por dia')
# Plot da figura
fig = go.Figure(data = plot_data, layout = plot_layout)
pyoff.iplot(fig)
fig, caxs = calmap.calendarplot(seriesHistory, daylabels='MTWTFSS', fillcolor='grey',cmap='YlGn', fig_kws=dict(figsize=(18, 9)))
fig.suptitle('Histórico de Vendas', fontsize=22)
fig.subplots_adjust(right=0.8)
cbar_ax = fig.add_axes([0.85, 0.15, 0.03, 0.67])
fig.colorbar(caxs[0].get_children()[1], cax=cbar_ax)
plt.show()
def add_stl_plot(fig, res, legend):
axs = fig.get_axes()
# Nome de cada um dos subplots
comps = ["trend", "seasonal", "resid"]
for ax, comp in zip(axs[1:], comps):
series = getattr(res, comp)
if comp == "resid":
ax.plot(series, marker="o", linestyle="none")
else:
ax.plot(series)
ax.legend(legend, frameon=False)
stl = STL(seriesHistory)
stl_res = stl.fit()
fig = stl_res.plot()
fig.set_size_inches((20, 12))
plt.show()
stl = STL(seriesHistory, robust=True)
res_robust = stl.fit()
fig = res_robust.plot()
fig.set_size_inches((20, 12))
res_non_robust = STL(seriesHistory, robust=False).fit()
add_stl_plot(fig, res_non_robust, ["Robusto", "Não Robusto"])
stl = STL(seriesHistory)
res = stl.fit()
deseasonal = res.observed - res.seasonal
bc_history, lmbda = boxcox(seriesHistory+1)
bc_history = pd.Series(bc_history, index=seriesHistory.index)
diff_history = seriesHistory.diff(7).dropna()
xi = seriesHistory.iloc[:7]
fig, axs = plt.subplots(nrows=4, sharex=True, figsize=(14, 8))
seriesHistory.plot(ax=axs[0])
axs[0].set_ylabel('Original')
deseasonal.plot(ax=axs[1])
axs[1].set_ylabel('Deseasonal')
bc_history.plot(ax=axs[2])
axs[2].set_ylabel('Boxcox')
diff_history.plot(ax=axs[3])
axs[3].set_ylabel('Stationary')
fig.align_ylabels()
fig.suptitle('Transformações')
plt.tight_layout()
plt.show()
Os testes abaixo concluiram:
ADF: O teste aceita a hipótese alternativa em que a série é estácionária.
KPSS: O teste aceita a hipótese alternativa em que a série não é estácionária.
ADF teste:
def adf_test(series):
result = adfuller(series, autolag='AIC')
print(f'ADF: {result[0]}')
print(f'steps: {result[1]}')
print(f'p-value: {result[1]}')
for key, value in result[4].items():
print('\nValor Critico:')
print(f' {key}: {value}')
adf = ADFTest(alpha = 0.05)
adf.should_diff(seriesHistory)
(0.01, False)
adf_test(seriesHistory)
ADF: -2.6163082378564737 steps: 0.0896733120129114 p-value: 0.0896733120129114 Valor Critico: 1%: -3.441694608475642 Valor Critico: 5%: -2.866544718556839 Valor Critico: 10%: -2.5694353738653684
adf.should_diff(diff_history)
(0.01, False)
adf_test(diff_history)
ADF: -6.80160615209054 steps: 2.229745766642107e-09 p-value: 2.229745766642107e-09 Valor Critico: 1%: -3.441834071558759 Valor Critico: 5%: -2.8666061267054626 Valor Critico: 10%: -2.569468095872659
KPSS teste:
kpss = KPSSTest(alpha = 0.05)
kpss.should_diff(seriesHistory)
(0.01, True)
kpss.should_diff(diff_history)
(0.1, False)
Toda a etapa de modelagem será considerada com 5 passos a frente de previsão.
result = pd.DataFrame(columns=['Algorithm', 'MSE', 'RMSE', 'MAE', 'Mean_Real_Value', 'Mean_Predict_Value'])
split_range = TimeSeriesSplit(n_splits = 8, max_train_size = pred_range.shape[0], test_size = pred_range.shape[0])
def record(result, algorithm, mse = -1, rmse = -1, mae = -1, mrv = -1, mpv = -1, show = True):
new = pd.DataFrame(dict(Algorithm = algorithm, MSE = mse, RMSE = rmse, MAE = mae, Mean_Real_Value = mrv,\
Mean_Predict_Value = mpv), index=[0])
result = pd.concat([result, new], ignore_index=True)
if show:
display(result)
return result
def plot(index, pred, mse, title, fig = None, ax = None, ylabel = ''):
global seriesHistory
empty_fig = fig is None
if empty_fig:
fig, ax = plt.subplots(figsize=(13, 6))
else:
ax.set_ylabel(ylabel)
ax.set_title(title)
patch_ = mpatches.Patch(color = 'white', label = f'MSE: {np.mean(mse):.1e}')
L1 = ax.legend(handles = [patch_], loc = 'upper left', fancybox = True, framealpha = 0.7, handlelength = 0)
ax.add_artist(L1)
sns.lineplot(x = seriesHistory.index, y = seriesHistory, label = 'Real', ax = ax)
sns.lineplot(x = index, y = pred, label = 'Previsto', ax = ax)
ax.axvline(x = index[0], color = 'red')
ax.legend(loc = 'upper right')
if empty_fig:
plt.show()
else:
return fig
'''
Correção da função diff_inv original:
https://github.com/alkaline-ml/pmdarima/issues/410
'''
def diff_inv_fix(x_diff, xi, column, steps = 1):
total_len = len(x_diff) + len(xi)
ix = pd.date_range(xi.index[0], periods = total_len)
inv = diff_inv(x_diff, steps, xi = xi) + np.fromiter(cycle(xi), count = total_len, dtype = float)
inv = pd.Series(inv, index = ix, name = column)
return inv
def reportTSR(data, modelName):
global result
global figs
mse = []
rmse = []
mae = []
mrv = []
mpv = []
title = modelName + ' - Time Series Regression'
indexPlot = 0
for train_id, test_id in split_range.split(data):
train, test = data.iloc[train_id], data.iloc[test_id]
gen = TimeseriesGenerator(train, train, PREVISOES, batch_size = BATCH_SIZE)
X_train = gen[0][0]
y_train = gen[0][1]
lr = LinearRegression()
lr.fit(X_train, y_train)
X_pred = y_train[-PREVISOES:].reshape(1,-1)
pred = np.empty(test.shape[0])
for i in range(len(pred)):
forecast = lr.predict(X_pred)
X_pred = np.delete(X_pred, 0, 1)
X_pred = np.concatenate((X_pred, forecast.reshape(-1, 1)), 1)
pred[i] = forecast
if modelName == 'Deseasonal':
last_seasonal = res.seasonal.reindex_like(train).tail(stl.period)
pred = pred + np.fromiter(cycle(last_seasonal), count = pred.shape[0], dtype = float)
test = test + res.seasonal.reindex_like(test)
indexPlot = 1
elif modelName == 'BoxCox':
pred = inv_boxcox1p(pred, lmbda)
test = inv_boxcox1p(test, lmbda)
indexPlot = 2
elif modelName == 'Stationary':
xi = seriesHistory.reindex_like(train).tail(PREVISOES)
pred = diff_inv_fix(pred, xi, 'order_purchase_timestamp', PREVISOES).iloc[PREVISOES:]
test = diff_inv_fix(test, xi, 'order_purchase_timestamp', PREVISOES).iloc[PREVISOES:]
indexPlot = 3
mse.append(mean_squared_error(pred, test, squared = True))
rmse.append(mean_squared_error(pred, test, squared = False))
mae.append(mean_absolute_error(pred, test))
mrv.append(np.mean(test))
mpv.append(np.mean(pred))
result = record(result, title, np.mean(mse), np.mean(rmse), np.mean(mae), np.mean(mrv), np.mean(mpv), False)
return plot(test.index, pred, mse, title, figs, axs[indexPlot], modelName)
figs, axs = plt.subplots(nrows = 4, sharex = True, figsize = (13,6))
figs.tight_layout()
plt.close()
reportTSR(seriesHistory.copy(), 'Original')
reportTSR(deseasonal.copy(), 'Deseasonal')
reportTSR(bc_history.copy(), 'BoxCox')
reportTSR(diff_history.copy(), 'Stationary')
result
| Algorithm | MSE | RMSE | MAE | Mean_Real_Value | Mean_Predict_Value | |
|---|---|---|---|---|---|---|
| 0 | Original - Time Series Regression | 5485.688752 | 63.032946 | 45.057261 | 179.560484 | 164.066513 |
| 1 | Deseasonal - Time Series Regression | 5070.936469 | 57.842363 | 39.592783 | 179.560484 | 166.267257 |
| 2 | BoxCox - Time Series Regression | 5531.781002 | 63.453289 | 45.576572 | 179.560484 | 160.984010 |
| 3 | Stationary - Time Series Regression | 20765.011179 | 123.412496 | 88.116806 | 181.296371 | 175.123977 |
def GSES(data, modelName, alpha, beta, gamma):
mse = []
for train_id, test_id in split_range.split(data):
train, test = data.iloc[train_id], data.iloc[test_id]
try:
with catch_warnings():
filterwarnings("ignore")
ES = (
ExponentialSmoothing(train, trend='add', seasonal='add', seasonal_periods=PREVISOES)
.fit(smoothing_level = alpha, smoothing_trend = beta, smoothing_seasonal = gamma, method='ls')
)
pred = ES.forecast(test.shape[0])
if modelName == 'Deseasonal':
last_seasonal = res.seasonal.reindex_like(train).tail(stl.period)
pred = pred + np.fromiter(cycle(last_seasonal), count = pred.shape[0], dtype = float)
test = test + res.seasonal.reindex_like(test)
elif modelName == 'BoxCox':
pred = inv_boxcox1p(pred, lmbda)
test = inv_boxcox1p(test, lmbda)
elif modelName == 'Stationary':
xi = seriesHistory.reindex_like(train).tail(PREVISOES)
pred = diff_inv_fix(pred, xi, 'order_purchase_timestamp', PREVISOES).iloc[PREVISOES:]
test = diff_inv_fix(test, xi, 'order_purchase_timestamp', PREVISOES).iloc[PREVISOES:]
mse.append(mean_squared_error(pred, test, squared = True))
except:
mse.append(-1)
return np.mean(mse)
def GSESOPT(data, modelName, trend, season, periods, bias, method):
mse = []
for train_id, test_id in split_range.split(data):
train, test = data.iloc[train_id], data.iloc[test_id]
try:
with catch_warnings():
filterwarnings("ignore")
ES = (
ExponentialSmoothing(train, trend = trend, seasonal = season, seasonal_periods = periods)
.fit(remove_bias = bias, method = method, optimized = True)
)
pred = ES.forecast(test.shape[0])
if modelName == 'Deseasonal':
last_seasonal = res.seasonal.reindex_like(train).tail(stl.period)
pred = pred + np.fromiter(cycle(last_seasonal), count = pred.shape[0], dtype = float)
test = test + res.seasonal.reindex_like(test)
elif modelName == 'BoxCox':
pred = inv_boxcox1p(pred, lmbda)
test = inv_boxcox1p(test, lmbda)
elif modelName == 'Stationary':
xi = seriesHistory.reindex_like(train).tail(PREVISOES)
pred = diff_inv_fix(pred, xi, 'order_purchase_timestamp', PREVISOES).iloc[PREVISOES:]
test = diff_inv_fix(test, xi, 'order_purchase_timestamp', PREVISOES).iloc[PREVISOES:]
mse.append(mean_squared_error(pred, test, squared = True))
except:
mse.append(-1)
return np.mean(mse)
def reportES(data, modelName, model_kwargs, fit_kwargs):
global result
global figs
mse = []
rmse = []
mae = []
mrv = []
mpv = []
title = modelName + ' - Exponential Smoothing'
indexPlot = 0
for train_id, test_id in split_range.split(data):
train, test = data.iloc[train_id], data.iloc[test_id]
ES = ExponentialSmoothing
stlf = STLForecast(train, ES, model_kwargs = model_kwargs).fit(fit_kwargs = fit_kwargs)
pred = stlf.forecast(test.shape[0])
if modelName == 'Deseasonal':
last_seasonal = res.seasonal.reindex_like(train).tail(stl.period)
pred = pred + np.fromiter(cycle(last_seasonal), count = pred.shape[0], dtype = float)
test = test + res.seasonal.reindex_like(test)
indexPlot = 1
elif modelName == 'BoxCox':
pred = inv_boxcox1p(pred, lmbda)
test = inv_boxcox1p(test, lmbda)
indexPlot = 2
elif modelName == 'Stationary':
xi = seriesHistory.reindex_like(train).tail(PREVISOES)
pred = diff_inv_fix(pred, xi, 'order_purchase_timestamp', PREVISOES).iloc[PREVISOES:]
test = diff_inv_fix(test, xi, 'order_purchase_timestamp', PREVISOES).iloc[PREVISOES:]
indexPlot = 3
mse.append(mean_squared_error(pred, test, squared = True))
rmse.append(mean_squared_error(pred, test, squared = False))
mae.append(mean_absolute_error(pred, test))
mrv.append(np.mean(test))
mpv.append(np.mean(pred))
result = record(result, title, np.mean(mse), np.mean(rmse), np.mean(mae), np.mean(mrv), np.mean(mpv), False)
return plot(test.index, pred, mse, title, figs, axs[indexPlot], modelName)
# create a set of exponential smoothing configs to try
def exp_smoothing_configs(seasonal=[None]):
models = list()
# define config lists
t_params = ['add', 'mul', None]
s_params = ['add', 'mul', None]
p_params = seasonal
r_params = [True, False]
method_params = ['L-BFGS-B' , 'TNC', 'SLSQP', 'Powell', 'trust-constr', 'bh', 'ls']
# create config instances
for t in t_params:
for s in s_params:
for p in p_params:
for r in r_params:
for m in method_params:
cfg = [t, s, p, r, m]
models.append(cfg)
return models
alphas = betas = gammas = np.arange(1, step=0.1)
tuning = pd.DataFrame(product(alphas, betas, gammas), columns=['alpha', 'beta', 'gamma'])
tuning.head()
| alpha | beta | gamma | |
|---|---|---|---|
| 0 | 0.0 | 0.0 | 0.0 |
| 1 | 0.0 | 0.0 | 0.1 |
| 2 | 0.0 | 0.0 | 0.2 |
| 3 | 0.0 | 0.0 | 0.3 |
| 4 | 0.0 | 0.0 | 0.4 |
%%time
tuning['mse'] = tuning.apply(lambda x: GSES(seriesHistory.copy(), 'Original', x.alpha, x.beta, x.gamma), axis = 1)
Wall time: 54.4 s
tuning.query('mse == mse.min()')
| alpha | beta | gamma | mse | |
|---|---|---|---|---|
| 114 | 0.1 | 0.1 | 0.4 | 5786.805431 |
params_ = exp_smoothing_configs([PREVISOES])
tuning2 = pd.DataFrame(params_, columns=['trend', 'season', 'periods', 'bias', 'method'])
len(tuning2)
126
tuning2.head()
| trend | season | periods | bias | method | |
|---|---|---|---|---|---|
| 0 | add | add | 5 | True | L-BFGS-B |
| 1 | add | add | 5 | True | TNC |
| 2 | add | add | 5 | True | SLSQP |
| 3 | add | add | 5 | True | Powell |
| 4 | add | add | 5 | True | trust-constr |
%%time
if EXECUTAR_GRID_SEARCH:
tuning2['mse'] = tuning2.apply(lambda x: GSESOPT(seriesHistory.copy(), 'Original',\
x.trend, x.season, x.periods, x.bias, x.method),\
axis = 1)
Wall time: 0 ns
if EXECUTAR_GRID_SEARCH:
display(tuning2.query('mse == mse.min() and mse != -1'))
title = 'Original - Exponential Smoothing'
figs, axs = plt.subplots(nrows=4, sharex=True, figsize=(13,6))
figs.align_ylabels()
figs.tight_layout()
plt.close()
model_kwargs = dict(trend = None, seasonal = None, seasonal_periods = PREVISOES)
fit_kwargs = dict(remove_bias = False, smoothing_level = 0.1, smoothing_trend = 0.1, smoothing_seasonal = 0.3,\
method = 'Powell')
reportES(seriesHistory.copy(), 'Original', model_kwargs, fit_kwargs)
result
| Algorithm | MSE | RMSE | MAE | Mean_Real_Value | Mean_Predict_Value | |
|---|---|---|---|---|---|---|
| 0 | Original - Time Series Regression | 5485.688752 | 63.032946 | 45.057261 | 179.560484 | 164.066513 |
| 1 | Deseasonal - Time Series Regression | 5070.936469 | 57.842363 | 39.592783 | 179.560484 | 166.267257 |
| 2 | BoxCox - Time Series Regression | 5531.781002 | 63.453289 | 45.576572 | 179.560484 | 160.984010 |
| 3 | Stationary - Time Series Regression | 20765.011179 | 123.412496 | 88.116806 | 181.296371 | 175.123977 |
| 4 | Original - Exponential Smoothing | 5523.286110 | 60.505490 | 41.256041 | 179.560484 | 170.716575 |
alphas = betas = gammas = np.arange(1, step=0.1)
tuning = pd.DataFrame(product(alphas, betas, gammas), columns=['alpha', 'beta', 'gamma'])
tuning.head()
| alpha | beta | gamma | |
|---|---|---|---|
| 0 | 0.0 | 0.0 | 0.0 |
| 1 | 0.0 | 0.0 | 0.1 |
| 2 | 0.0 | 0.0 | 0.2 |
| 3 | 0.0 | 0.0 | 0.3 |
| 4 | 0.0 | 0.0 | 0.4 |
%%time
tuning['mse'] = tuning.apply(lambda x: GSES(deseasonal.copy(), 'Deseasonal', x.alpha, x.beta, x.gamma), axis=1)
Wall time: 58.9 s
tuning.query('mse == mse.min()')
| alpha | beta | gamma | mse | |
|---|---|---|---|---|
| 109 | 0.1 | 0.0 | 0.9 | 5077.165222 |
params_ = exp_smoothing_configs([PREVISOES])
tuning2 = pd.DataFrame(params_, columns=['trend', 'season', 'periods', 'bias', 'method'])
%%time
if EXECUTAR_GRID_SEARCH:
tuning2['mse'] = tuning2.apply(lambda x: GSESOPT(deseasonal.copy(), 'Deseasonal',\
x.trend, x.season, x.periods, x.bias, x.method),\
axis = 1)
Wall time: 0 ns
if EXECUTAR_GRID_SEARCH:
display(tuning2.query('mse == mse.min() and mse != -1'))
model_kwargs = dict(trend = None, seasonal = 'add', seasonal_periods = PREVISOES)
fit_kwargs = dict(remove_bias = True, smoothing_level = 0.1, smoothing_trend = 0.1, smoothing_seasonal = 0.9, method = 'Powell')
reportES(deseasonal.copy(), 'Deseasonal', model_kwargs, fit_kwargs)
alphas = betas = gammas = np.arange(1, step=0.1)
tuning = pd.DataFrame(product(alphas, betas, gammas), columns=['alpha', 'beta', 'gamma'])
tuning.head()
| alpha | beta | gamma | |
|---|---|---|---|
| 0 | 0.0 | 0.0 | 0.0 |
| 1 | 0.0 | 0.0 | 0.1 |
| 2 | 0.0 | 0.0 | 0.2 |
| 3 | 0.0 | 0.0 | 0.3 |
| 4 | 0.0 | 0.0 | 0.4 |
%%time
tuning['mse'] = tuning.apply(lambda x: GSES(bc_history.copy(), 'BoxCox', x.alpha, x.beta, x.gamma), axis=1)
Wall time: 56 s
tuning.query('mse == mse.min() and mse != -1')
| alpha | beta | gamma | mse | |
|---|---|---|---|---|
| 114 | 0.1 | 0.1 | 0.4 | 5505.869056 |
params_ = exp_smoothing_configs([PREVISOES])
tuning2 = pd.DataFrame(params_, columns=['trend', 'season', 'periods', 'bias', 'method'])
%%time
if EXECUTAR_GRID_SEARCH:
tuning2['mse'] = tuning2.apply(lambda x: GSESOPT(bc_history.copy(), 'BoxCox',\
x.trend, x.season, x.periods, x.bias, x.method),\
axis = 1)
Wall time: 0 ns
if EXECUTAR_GRID_SEARCH:
display(tuning2.query('mse == mse.min() and mse != -1'))
model_kwargs = dict(trend='add', seasonal='add', seasonal_periods=PREVISOES)
fit_kwargs = dict(remove_bias = True, smoothing_level = 0.1, smoothing_trend = 0.1, smoothing_seasonal = 0.4,\
method = 'Powell')
reportES(bc_history.copy(), 'BoxCox', model_kwargs, fit_kwargs)
alphas = betas = gammas = np.arange(1, step=0.1)
tuning = pd.DataFrame(product(alphas, betas, gammas), columns=['alpha', 'beta', 'gamma'])
tuning.head()
| alpha | beta | gamma | |
|---|---|---|---|
| 0 | 0.0 | 0.0 | 0.0 |
| 1 | 0.0 | 0.0 | 0.1 |
| 2 | 0.0 | 0.0 | 0.2 |
| 3 | 0.0 | 0.0 | 0.3 |
| 4 | 0.0 | 0.0 | 0.4 |
%%time
tuning['mse'] = tuning.apply(lambda x: GSES(diff_history.copy(), 'Stationary', x.alpha, x.beta, x.gamma), axis=1)
Wall time: 1min 3s
tuning.query('mse == mse.min() and mse != -1')
| alpha | beta | gamma | mse | |
|---|---|---|---|---|
| 30 | 0.0 | 0.3 | 0.0 | 43322.617332 |
params_ = exp_smoothing_configs([PREVISOES])
tuning2 = pd.DataFrame(params_, columns=['trend', 'season', 'periods', 'bias', 'method'])
%%time
if EXECUTAR_GRID_SEARCH:
tuning2['mse'] = tuning2.apply(lambda x: GSESOPT(diff_history.copy(), 'Stationary',\
x.trend, x.season, x.periods, x.bias, x.method),\
axis = 1)
Wall time: 0 ns
if EXECUTAR_GRID_SEARCH:
display(tuning2.query('mse == mse.min() and mse != -1'))
model_kwargs = dict(trend='add', seasonal='add', seasonal_periods=PREVISOES)
fit_kwargs = dict(remove_bias = False, smoothing_level = 0, smoothing_trend = 0.3, smoothing_seasonal = 0.0,\
method = 'ls')
reportES(diff_history.copy(), 'Stationary', model_kwargs, fit_kwargs)
result
| Algorithm | MSE | RMSE | MAE | Mean_Real_Value | Mean_Predict_Value | |
|---|---|---|---|---|---|---|
| 0 | Original - Time Series Regression | 5485.688752 | 63.032946 | 45.057261 | 179.560484 | 164.066513 |
| 1 | Deseasonal - Time Series Regression | 5070.936469 | 57.842363 | 39.592783 | 179.560484 | 166.267257 |
| 2 | BoxCox - Time Series Regression | 5531.781002 | 63.453289 | 45.576572 | 179.560484 | 160.984010 |
| 3 | Stationary - Time Series Regression | 20765.011179 | 123.412496 | 88.116806 | 181.296371 | 175.123977 |
| 4 | Original - Exponential Smoothing | 5523.286110 | 60.505490 | 41.256041 | 179.560484 | 170.716575 |
| 5 | Deseasonal - Exponential Smoothing | 4995.372698 | 57.454750 | 38.814970 | 179.560484 | 167.407451 |
| 6 | BoxCox - Exponential Smoothing | 5295.550187 | 60.651937 | 41.800642 | 179.560484 | 174.995807 |
| 7 | Stationary - Exponential Smoothing | 48372.797213 | 196.181758 | 141.789213 | 181.296371 | 178.154334 |
def reportArima(arimaModel, modelName):
global result
global figs
mse = []
rmse = []
mae = []
mrv = []
mpv = []
title = modelName + ' - ' + arimaModel.__str__().strip()
indexPlot = 0
for train_id, test_id in split_range.split(data):
train, test = data.iloc[train_id], data.iloc[test_id]
arimaModel.fit(train)
pred = arimaModel.predict(test.shape[0])
if modelName == 'Deseasonal':
last_seasonal = res.seasonal.reindex_like(train).tail(stl.period)
pred = pred + np.fromiter(cycle(last_seasonal), count = pred.shape[0], dtype = float)
test = test + res.seasonal.reindex_like(test)
indexPlot = 1
elif modelName == 'BoxCox':
pred = inv_boxcox1p(pred, lmbda)
test = inv_boxcox1p(test, lmbda)
indexPlot = 2
elif modelName == 'Stationary':
xi = seriesHistory.reindex_like(train).tail(PREVISOES)
pred = diff_inv_fix(pred, xi, 'order_purchase_timestamp', PREVISOES).iloc[PREVISOES:]
test = diff_inv_fix(test, xi, 'order_purchase_timestamp', PREVISOES).iloc[PREVISOES:]
indexPlot = 3
mse.append(mean_squared_error(pred, test, squared = True))
rmse.append(mean_squared_error(pred, test, squared = False))
mae.append(mean_absolute_error(pred, test))
mrv.append(np.mean(test))
mpv.append(np.mean(pred))
result = record(result, title, np.mean(mse), np.mean(rmse), np.mean(mae), np.mean(mrv), np.mean(mpv), False)
return plot(test.index, pred, mse, title, figs, axs[indexPlot], modelName)
figs, axs = plt.subplots(nrows=4, sharex=True, figsize=(13,6))
figs.align_ylabels()
figs.tight_layout()
plt.close()
lags = 90
with catch_warnings():
filterwarnings("ignore")
fig, ax = plt.subplots(2, figsize=(12, 6), sharex=True)
plot_acf(seriesHistory.diff().dropna(), ax = ax[0], lags = lags, missing = 'drop')
plot_pacf(seriesHistory.diff().dropna(), ax = ax[1], lags = lags)
plt.show()
%%time
data = seriesHistory.copy()
arimaModel = auto_arima(seriesHistory.copy(), m = PREVISOES)
arimaModel
Wall time: 17.6 s
ARIMA(order=(2, 1, 1), scoring_args={}, seasonal_order=(1, 0, 1, 5),
suppress_warnings=True, with_intercept=False)
reportArima(arimaModel, 'Original')
%%time
data = deseasonal.copy()
arimaModel = auto_arima(data, seasonal = False)
arimaModel
Wall time: 2.53 s
ARIMA(order=(2, 1, 1), scoring_args={}, suppress_warnings=True)
reportArima(arimaModel, 'Deseasonal')
%%time
data = bc_history.copy()
arimaModel = auto_arima(data, m = PREVISOES)
arimaModel
Wall time: 35.9 s
ARIMA(order=(4, 1, 2), scoring_args={}, seasonal_order=(0, 0, 0, 5),
suppress_warnings=True)
reportArima(arimaModel, 'BoxCox')
%%time
data = diff_history.copy()
arimaModel = auto_arima(data, seasonal = False)
arimaModel
Wall time: 5.87 s
ARIMA(order=(3, 0, 3), scoring_args={}, suppress_warnings=True,
with_intercept=False)
reportArima(arimaModel, 'Stationary')
result
| Algorithm | MSE | RMSE | MAE | Mean_Real_Value | Mean_Predict_Value | |
|---|---|---|---|---|---|---|
| 0 | Original - Time Series Regression | 5485.688752 | 63.032946 | 45.057261 | 179.560484 | 164.066513 |
| 1 | Deseasonal - Time Series Regression | 5070.936469 | 57.842363 | 39.592783 | 179.560484 | 166.267257 |
| 2 | BoxCox - Time Series Regression | 5531.781002 | 63.453289 | 45.576572 | 179.560484 | 160.984010 |
| 3 | Stationary - Time Series Regression | 20765.011179 | 123.412496 | 88.116806 | 181.296371 | 175.123977 |
| 4 | Original - Exponential Smoothing | 5523.286110 | 60.505490 | 41.256041 | 179.560484 | 170.716575 |
| 5 | Deseasonal - Exponential Smoothing | 4995.372698 | 57.454750 | 38.814970 | 179.560484 | 167.407451 |
| 6 | BoxCox - Exponential Smoothing | 5295.550187 | 60.651937 | 41.800642 | 179.560484 | 174.995807 |
| 7 | Stationary - Exponential Smoothing | 48372.797213 | 196.181758 | 141.789213 | 181.296371 | 178.154334 |
| 8 | Original - ARIMA(2,1,1)(1,0,1)[5] | 5734.420367 | 64.119709 | 46.036417 | 179.560484 | 168.922554 |
| 9 | Deseasonal - ARIMA(2,1,1)(0,0,0)[0] intercept | 9591.302944 | 77.051189 | 59.640244 | 179.560484 | 191.568473 |
| 10 | BoxCox - ARIMA(4,1,2)(0,0,0)[5] intercept | 11302.021938 | 88.121279 | 69.421193 | 179.560484 | 190.763836 |
| 11 | Stationary - ARIMA(3,0,3)(0,0,0)[0] | 20758.544868 | 122.868983 | 87.811596 | 181.296371 | 168.079295 |